Load Data

The data can be found at covid19ireland-geohive.hub.arcgis.com, specifically:

The data can be loaded using the read_csv function from the readr package. When loading the data the date column can be changed to a date type by using as.Date, this makes it easier to sort and filter by date.

library(readr)
library(dplyr)

# LaboratoryLocalTimeSeriesHistoricView
testing_csv <- 
  read_csv("http://opendata-geohive.hub.arcgis.com/datasets/f6d6332820ca466999dbd852f6ad4d5a_0.csv?outSR={%22latestWkid%22:3857,%22wkid%22:102100}") %>%
  mutate(DateStamp = as.Date(Date_HPSC))

# Covid19AcuteHospitalHistoricSummaryOpenData
hospital_csv <- 
  read_csv("http://opendata-geohive.hub.arcgis.com/datasets/fe9bb23592ec4142a4f4c2c9bd32f749_0.csv?outSR={%22latestWkid%22:4326,%22wkid%22:4326}")  %>%
  mutate(DateStamp = as.Date(Date))

# ICUBISHistoricTimelinePublicView
icu_csv <- 
  read_csv("https://opendata.arcgis.com/datasets/c8208a0a8ff04a45b2922ae69e9b2206_0.csv") %>%
  mutate(DateStamp = as.Date(extract))

# CovidStatisticsProfileHPSCIrelandOpenData
profile_csv <-
  read_csv("https://opendata.arcgis.com/datasets/d8eb52d56273413b84b0187a4e9117be_0.csv") %>%
  mutate(DateStamp = as.Date(StatisticsProfileDate))
cat("### Tidy and Merge Data

The 3 datasets can be joined together using the `left_join` function. 

Extra statistics can also be calculated, e.g. the number of daily positive tests can be calculated by taking `Positive - lag(Positive)`. The `lag` function returns the previous value for a vector, therefore `Positive - lag(Positive)` will calculate today's value minus yesterday's value.

The `rollsum` function calculates a rolling total, the argument `k = 14` tells the function to calculate 14 day totals.")

Tidy and Merge Data

The 3 datasets can be joined together using the left_join function.

Extra statistics can also be calculated, e.g. the number of daily positive tests can be calculated by taking Positive - lag(Positive). The lag function returns the previous value for a vector, therefore Positive - lag(Positive) will calculate today’s value minus yesterday’s value.

The rollsum function calculates a rolling total, the argument k = 14 tells the function to calculate 14 day totals.

# library(zoo)
my_rolling_sum <- function(x, n_days = 14) zoo::rollsum(x - lag(x), k = n_days, align = "right", fill = NA)

covid_tidy <- 
  testing_csv %>%
  arrange(DateStamp) %>%
  # select(DateStamp, TotalLabs, Positive, Hospitals) %>%
  transmute(DateStamp,
         daily_labs = TotalLabs - lag(TotalLabs),
         daily_positive = Positive - lag(Positive),
         rolling14_labs = my_rolling_sum(TotalLabs),
         rolling14_positive = my_rolling_sum(Positive),
         roling14_percentage = 100*(rolling14_positive/rolling14_labs)) %>%
  left_join(hospital_csv %>%
               arrange(DateStamp) %>%
               select(DateStamp, 
                      hospital_cases = SUM_number_of_confirmed_covid_1,
                      hospital_new = SUM_no_new_admissions_covid19_p,
                      hospital_discharge = SUM_no_discharges_covid19_posit),
            by = "DateStamp") %>%
  left_join(icu_csv %>%
              select(DateStamp, 
                     icu_cases = ncovidconf,
                     icu_new = adcconf),
            by = "DateStamp") 

Summaries of the data can now be calculated, such as the statistics on a month by month basis.

covid_table <- 
  covid_tidy %>%
  mutate(month = format(DateStamp, "%m"), month_name = format(DateStamp, "%B")) %>%
  group_by(month, month_name) %>%
  summarise(total_tests = sum(daily_labs, na.rm = T),
            total_positive = sum(daily_positive, na.rm = T),
            no_obs = length(daily_labs),
            new_hospital = sum(hospital_new, na.rm = T),
            new_icu = sum(icu_new, na.rm = T)) %>%
  mutate(`% Positive` = paste0(round(100*(total_positive/total_tests), 2), "%")) %>%
  select(Month = month_name, `No. of Days Data` = no_obs, 
         Tests = total_tests, Positive = total_positive, `% Positive`,
         `Hospital Admissions` = new_hospital, `ICU Admissions` = new_icu)

t(covid_table[,-1])
January February March April May June July August September October November December
No. of Days Data 31 28 45 33 31 30 31 31 30 31 30 31
Tests 705531 456157 493779 193796 174997 85177 203235 206960 341171 453767 329966 410091
Positive 100232 24177 20533 21171 4901 611 681 2867 7578 25971 11333 24857
% Positive 14.21% 5.3% 4.16% 10.92% 2.8% 0.72% 0.34% 1.39% 2.22% 5.72% 3.43% 6.06%
Hospital Admissions 3228 1311 668 1024 419 85 35 65 177 669 424 539
ICU Admissions 487 206 100 161 59 13 5 9 31 77 70 92

Prepare for Plotting

In order to make it easier to plot multiple variables using ggplot2, the data must be transformed from ‘wide’ to ‘long’. The current format of the data has 1 row for each date, once transformed the data will have a row for each date and variable combination.

Current Format (Wide)

DateStamp hospital_cases icu_cases rolling14_positive roling14_percentage
2021-04-03 242 65 8211 3.3576
2021-04-02 264 62 8365 3.4198
2021-04-01 274 63 8313 3.4827
2021-03-31 297 64 8267 3.5530
2021-03-30 310 65 8228 3.6165
2021-03-29 331 70 8219 3.6700

Transformed Format (Long)

The pivot_longer function will transform the data to a long format. The statistics present in the new data can be chosen from the current columns.

stats_key <- 
  c("hospital_cases" = "Cases in Hospitals",
    "icu_cases" = "Cases in ICU",
    "rolling14_positive" = "Positive Tests (14 Day Total)",
    "roling14_percentage" = "% Positive Tests (14 Day Total)")

testing_plot_data <- 
  covid_tidy %>%
  tidyr::pivot_longer(cols = names(stats_key), 
                      names_to = "stat", values_to = "value") %>%
  mutate(stat = recode_factor(stat, !!!stats_key, .ordered = TRUE))
DateStamp stat value
2021-04-02 rolling14_positive 8365.0000
2021-04-02 roling14_percentage 3.4198
2021-04-03 hospital_cases 242.0000
2021-04-03 icu_cases 65.0000
2021-04-03 rolling14_positive 8211.0000
2021-04-03 roling14_percentage 3.3576

Graphs

# https://en.wikipedia.org/wiki/COVID-19_pandemic_in_the_Republic_of_Ireland
key_dates <- 
  data.frame(event = c("First Lockdown", "Easing Restrictions", "Easing Phase 3", "Midlands Lockdown",  "Dublin L3",  "Level 3", "Level 5", "Level 3", "Level 5-", "Level 5", "Level 5+"),
             datestamp = c("2020-03-15", "2020-05-18",          "2020-06-29",     "2020-08-07",        "2020-09-18", "2020-10-04", "2020-10-19", "2020-12-01", "2020-12-22", "2020-12-30", "2021-01-06")) %>%
  mutate(datestamp = as.Date(datestamp))

key_dates_2w <- 
  key_dates %>%
  mutate(datestamp = datestamp + 14)
library(ggplot2)

covid_plot <- 
  testing_plot_data %>%
  ggplot() +
  geom_line(aes(x = DateStamp, y = value)) +
  geom_vline(data = key_dates, aes(xintercept = datestamp), linetype = 2, colour = "brown") +
  # geom_vline(data = key_dates_2w, aes(xintercept = datestamp), linetype = 3, colour = "red", size = 0.5) +
  geom_text(data = key_dates, mapping = aes(label = event, y = 0, x = datestamp), 
            angle = 90, hjust = -0, vjust = -0.3, size = 2, colour = "brown") +
  # Change y scale to right and have all plots begin scale at 0
  scale_y_continuous(position = "right", limits = c(0, NA)) +
  # Show each month on the x axis
  scale_x_date(date_breaks = "1 month", date_labels = "%b", ) +
  # Create individual plots for each statistic
  facet_wrap(~stat, ncol = 2, scales = "free") +
  # Add labels to the plot
  labs(x = "", y = "",
       title = "Republic of Ireland Covid Stats",
       caption = 'Data: https://opendata-geohive.hub.arcgis.com/') +
  theme_light() +
  theme(text = element_text(size = 15),
        axis.text.x = element_text(size = 10, angle = 0, hjust = 0))

covid_plot

The plot can be saved locally using ggsave

ggsave(file = paste0("~/Desktop/", Sys.Date(), "_covid_roi.png"),
       plot = covid_plot, width = 9, height = 5, units = "in")

Recent Data Only

The data can be filter to show only certain dates using the filter function. The plot below shows data from the beginning of August.

covid_plot_aug <- 
  testing_plot_data %>%  
  # remove any data before "2020-08-01"
  filter(DateStamp >= start_date) %>%
  ggplot() +
  geom_line(aes(x = DateStamp, y = value)) +
  geom_vline(data = key_dates %>% filter(datestamp>= start_date), 
             aes(xintercept = datestamp), linetype = 2, colour = "brown") +
  geom_text(data = key_dates%>% filter(datestamp>= start_date), 
            mapping = aes(label = event, y = 0, x = datestamp), 
            angle = 90, hjust = -0, vjust = -0.3, size = 2, colour = "brown") +
  scale_y_continuous(position = "right", limits = c(0, NA)) +
  scale_x_date(date_breaks = "3 week", date_labels = "%d %b") +
  facet_wrap(~stat, ncol = 2, scales = "free") +
  labs(x = "", y = "",
       title = paste0("Republic of Ireland Since ", format(start_date, "%B")),
       caption = 'Data: https://opendata-geohive.hub.arcgis.com/') +
  theme_light() +
  theme(text = element_text(size = 15))

covid_plot_aug

Breakdown of Cases

by_age_14day <- 
  profile_csv %>%
  select(DateStamp, starts_with("Aged")) %>%
  arrange(DateStamp) %>%
  # Calculate rolling 14 day totals of all columns beginning with 'Aged'
  mutate(across(starts_with("Aged"), my_rolling_sum)) %>%
  # Pivot table to long format for columns beginning with 'Aged'
  tidyr::pivot_longer(cols = starts_with("Aged"), 
                      names_to = "Age_Group", values_to = "Cases")
DateStamp Age_Group Cases
2021-04-03 Aged1to4 634
2021-04-03 Aged5to14 1237
2021-04-03 Aged15to24 1127
2021-04-03 Aged25to34 1182
2021-04-03 Aged35to44 1428
2021-04-03 Aged45to54 895
by_age_14day %>%
  filter(DateStamp > start_date) %>%
  # Change order in which the age groups will appear for plot
  mutate(Age_Group = forcats::fct_relevel(Age_Group, unique(.$Age_Group))) %>%
  # Begin plot
  ggplot(aes(x = DateStamp, y = Cases)) +
  geom_line() +
  # Create a single plot for each age group
  facet_wrap(~Age_Group) +
  # Specify the breaks for dates
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  labs(title = "14 Day Incidence by Age Group",
       x = "", y = "") +
  theme_light() +
  theme(text = element_text(size = 16))

# same as above but with "HospitalisedAged"
profile_csv %>%
  select(DateStamp, starts_with("HospitalisedAged")) %>%
  arrange(DateStamp) %>%
  mutate(across(starts_with("HospitalisedAged"), my_rolling_sum)) %>%
  tidyr::pivot_longer(cols = starts_with("HospitalisedAged"), 
                      names_to = "Age_Group", values_to = "Cases") %>%
  filter(DateStamp > start_date) %>%
  mutate(Age_Group = forcats::fct_relevel(Age_Group, unique(.$Age_Group))) %>%
  ggplot(aes(x = DateStamp, y = Cases)) +
  geom_line() +
  facet_wrap(~Age_Group) +
  scale_x_date(date_breaks = "3 week", date_labels = "%d %b") +
  labs(title = "14 Day Admissions to Hospital by Age Group",
       x = "", y = "") +
  theme_light() +
  theme(text = element_text(size = 16))

Combining Cases and Hospital Admissions

case_and_hospital_14day <- 
  profile_csv %>%
  select(DateStamp, starts_with(c("Aged","HospitalisedAged"))) %>%
  # use mutates to consolidate case groupings to match hospital groups
  mutate(Aged4 = Aged1to4, HospitalisedAged4 = HospitalisedAged5, .keep = "unused")%>%
  mutate(across(starts_with(c("Aged","HospitalisedAged")), my_rolling_sum)) %>%
  tidyr::pivot_longer(cols = starts_with(c("Aged","HospitalisedAged")), 
                      names_to = "Age_Group", values_to = "Cases")
DateStamp Age_Group Cases
2021-04-03 Aged4 634
2021-04-03 HospitalisedAged4 11
2021-04-02 Aged4 674
2021-04-02 HospitalisedAged4 11
2021-04-01 Aged4 692
2021-04-01 HospitalisedAged4 8
case_and_hospital_14day <-
  case_and_hospital_14day %>%
  # create new variable 'Hospital' to denote if the row is a value for hospital
  # also remove Hospitalised from hospital age groups to match the cases grouping
  mutate(Hospital = if_else(stringr::str_detect(Age_Group, "Hospital"), "Hospitalised", "Total"),
         Age_Group = stringr::str_replace(Age_Group, "Hospitalised", ""))
DateStamp Age_Group Cases Hospital
2021-04-03 Aged4 634 Total
2021-04-03 Aged4 11 Hospitalised
2021-04-02 Aged4 674 Total
2021-04-02 Aged4 11 Hospitalised
2021-04-01 Aged4 692 Total
2021-04-01 Aged4 8 Hospitalised
case_and_hospital_14day <-
  case_and_hospital_14day %>%
  # pivot on the new 'Hospital' variable
  tidyr::pivot_wider(names_from = Hospital, values_from = Cases)
DateStamp Age_Group Total Hospitalised
2021-04-03 Aged4 634 11
2021-04-02 Aged4 674 11
2021-04-01 Aged4 692 8
2021-03-31 Aged4 690 9
2021-03-30 Aged4 677 9

Plot

# scl is used to scale the lines for the two groups
# We want the max values for both to be the limit for each
scl <- 
  case_and_hospital_14day  %>%
  filter(DateStamp >= start_date) %>%
  summarise(max_tot = max(Total, na.rm = T),
            max_hospital = max(Hospitalised, na.rm = T)) %>%
  mutate(scale = max_tot/max_hospital) %>%
  pull(scale)


# cols are colours which are colourblind friendly
cols <-  
  c("#E69F00", "#56B4E9", "#009E73",
    "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# library(ggtext)

case_and_hospital_14day  %>%
  filter(DateStamp >= start_date) %>%
  mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14"))) %>%
  ggplot() +
  # Add line for total cases
  geom_line(aes(x = DateStamp, y = Total, colour = "Total Cases")) +
  # Add line for hospital admissions
  geom_line(aes(x = DateStamp, y = Hospitalised*scl, colour = "Hospitalised")) +
  # Create a separate plot for each group
  facet_wrap(~Age_Group) +
  # sec_axis scales the hospital admissions data and adds axis labels on the right hand side
  scale_y_continuous(sec.axis = sec_axis(~./scl, name = "14 Day Hopital Admissions")) +
  scale_x_date(date_breaks = "2 weeks", date_labels = "%d %b") +
  # Use the custom colours
  scale_colour_manual(values = cols) +
  labs(title = paste0("<span style='color:", cols[2],";'>14 Day Incidence</span>"," and ",
                      "<span style='color:", cols[1],";'>Hospital Admissions</span>"," by Age Group"),
    # title = "14 Day Incidence and Hospital Admissions by Age Group", 
       x = "", y = "14 Day Incidence", colour= "") +
  theme_light() +
  # Change elements of the plot to match the colours of the lines
  theme(plot.title = ggtext::element_markdown(lineheight = 1.1),
        legend.position = "none",
        legend.margin=margin(-10,0,0,0),
        axis.text.y.right=element_text(colour=cols[1]),
        axis.ticks.y.right=element_line(colour=cols[1]),
        axis.title.y.right=element_text(colour=cols[1]),
        axis.text.y=element_text(colour=cols[2]),
        axis.ticks.y=element_line(colour=cols[2]),
        axis.title.y=element_text(colour=cols[2]),
        text = element_text(size = 16),
        axis.text.x = element_text(size = 12, angle = -30, hjust = 0))

case_and_hospital_14day  %>%
  filter(DateStamp >= (start_date + 30)) %>%
  mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14"))) %>%
  ggplot() +
  # Add line for total cases
  geom_tile(aes(x = DateStamp, y = Age_Group, fill = Total), size = 0.1) +
  # scale_fill_viridis_c() +
  # scale_fill_distiller(palette = "RdYlGn", limits = c(0, NA)) +
  scale_fill_viridis_c() +
  scale_x_date(date_breaks = "4 days", date_labels = "%d %b", expand = c(0,0)) +
  coord_equal() +
  # theme_light() +
  # ggthemes::theme_tufte(base_family="Helvetica") +
  labs(x = NULL, y = NULL, fill = NULL,
       title = "14 Day Incidence") +
  theme(axis.ticks.y=element_blank(),
        # axis.text.x = element_text(hjust = 0),
        legend.position = "bottom")

case_and_hospital_14day  %>%
  filter(DateStamp >= (start_date + 30)) %>%
  mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14")),
         Total = Total/14) %>%
  ggplot() +
  # Add line for total cases
  geom_tile(aes(x = DateStamp, y = Age_Group, fill = Total), size = 0.1) +
  # scale_fill_viridis_c() +
  # scale_fill_distiller(palette = "RdYlGn", limits = c(0, NA)) +
  scale_fill_viridis_c(limits = c(0, NA)) +
  # viridis::scale_fill_viridis() +
  scale_x_date(date_breaks = "4 days", date_labels = "%d %b", expand = c(0,0)) +
  coord_equal() +
  # theme_light() +
  # ggthemes::theme_tufte(base_family="Helvetica") +
  labs(x = NULL, y = NULL, fill = NULL,
       title = "Average Daily Incidence (14 Day Average)") +
  theme(axis.ticks.y=element_blank(),
        # axis.text.x = element_text(hjust = 0),
        legend.position = "bottom")

  # # Add line for hospital admissions
  # geom_line(aes(x = DateStamp, y = Hospitalised*scl, colour = "Hospitalised")) +
  # # Create a separate plot for each group
  # facet_wrap(~Age_Group) +
  # # sec_axis scales the hospital admissions data and adds axis labels on the right hand side
  # scale_y_continuous(sec.axis = sec_axis(~./scl, name = "14 Day Hopital Admissions")) +
  # scale_x_date(date_breaks = "2 weeks", date_labels = "%d %b") +
  # # Use the custom colours
  # scale_colour_manual(values = cols) +
  # labs(title = paste0("<span style='color:", cols[2],";'>14 Day Incidence</span>"," and ",
  #                     "<span style='color:", cols[1],";'>Hospital Admissions</span>"," by Age Group"),
  #   # title = "14 Day Incidence and Hospital Admissions by Age Group", 
  #      x = "", y = "14 Day Incidence", colour= "") +
  # theme_light()



case_and_hospital_14day  %>%
  filter(DateStamp >= Sys.Date() - 10) %>%
  mutate(Age_Group = forcats::fct_relevel(Age_Group, c("Aged4", "Aged5to14"))) %>%
  ggplot() +
  # Add line for total cases
  geom_tile(aes(y = DateStamp, x = Age_Group, fill = Total), size = 0.1) +
  geom_text(aes(y = DateStamp, x = Age_Group, label = Total)) +
  # scale_fill_viridis_c() +
  # scale_fill_distiller(palette = "RdYlGn", limits = c(0, NA)) +
  scale_fill_viridis_c(limits = c(0, NA)) +
  scale_y_date(date_breaks = "1 days", date_labels = "%d %b", expand = c(0,0)) +
  scale_x_discrete(expand = c(0,0)) +
  # coord_equal() +
  # theme_light() +
  # ggthemes::theme_tufte(base_family="Helvetica") +
  labs(x = NULL, y = NULL, fill = NULL,
       title = "14 Day Incidence") +
  theme(axis.ticks.y=element_blank(),
        # axis.text.x = element_text(hjust = 0),
        legend.position = "none")

Origin of Transmission

profile_csv %>%
  mutate(NoData = 100 - (CommunityTransmission + CloseContact + TravelAbroad),
         NoData = if_else(NoData < 0, 0, NoData)) %>%
  # select(DateStamp, CommunityTransmission, CloseContact, TravelAbroad, NoData) %>%
  tidyr::pivot_longer(cols = c(CommunityTransmission, CloseContact, TravelAbroad),#, NoData
                      names_to = "Origin", values_to = "Perc") %>%
  tidyr::drop_na() %>%
  # mutate(Origin = forcats::fct_relevel(Origin, unique(.$Origin))) %>%
  ggplot(aes(x = DateStamp, y = Perc, colour = Origin)) +
  geom_line(size = 0.8) + 
  scale_y_continuous(labels = function(x) paste0(x, "%"), limits = c(0, 75), position = "right",
                     sec.axis = sec_axis(~., labels = function(x) paste0(x, "%"))) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_colour_manual(values=cols) +
  labs(title = "Origin of Transmission ",
       y = "", x = "", colour= "") +
  theme_light() +
  theme(legend.margin=margin(-10,0,0,0),
        text = element_text(size = 16),
        legend.position = "bottom") # legend.title=element_blank()